# 0
# setup

# set directory
setwd("/users/samharper/dropbox/-to review/suicide/replication files")

# required packages
library(foreign)
library(doBy)
library(ggplot2)
library(tis)
library(gridExtra)

# 1
# datasets

# Read in Stata dataset for age/sex/race
d <- read.dta("suic-econ-data1-census.dta")

# Read in Stata dataset for education
ded <- read.dta("suic-econ-data2-cps.dta")
# drop 16-24 year olds
de <- subset(ded, agecat!="16 to 24")

# format month/year for R:
d$year2 <- as.character(d$year)
d$month2 <- as.character(d$month)
d$day <- "1"
d$rtime <- paste(d$year2,d$month2,d$day, sep = "-")
d$time <- as.Date(d$rtime, origin="1970-01-01")

# now for education data
# format month/year for R:
de$year2 <- as.character(de$year)
de$month2 <- as.character(de$month)
de$day <- "1"
de$rtime <- paste(de$year2,de$month2,de$day, sep = "-")
de$time <- as.Date(de$rtime, origin="1970-01-01")

# 2
# Create aggregated datasets for calculating suicide rates

# crude suicide rate by month
dsuic <- summaryBy(suic + pop1 ~ time, FUN=sum, data=d)
dsuic$rate <- dsuic$suic.sum / (dsuic$pop1.sum/12) * 100000

# aggregate ICEI (just for fun)
dicei <- summaryBy(icei + unemprtsa ~ time, FUN=mean, data=d)
diceis <- summaryBy(icei + unemprtsa ~ state + time, FUN=mean, data=d)

# aggregate by gender and month
dsex <- summaryBy(suic + pop1 ~ sex + time, FUN=sum, data=d)
dsex$rate <- dsex$suic.sum / (dsex$pop1.sum/12) * 100000

# aggregate by age and month
dage <- summaryBy(suic + pop1 ~ agecat + time, FUN=sum, data=d)
dage$rate <- dage$suic.sum / (dage$pop1.sum/12) * 100000

# aggregate by race and month
drace <- summaryBy(suic + pop1 ~ race2 + time, FUN=sum, data=d)
drace$rate <- drace$suic.sum / (drace$pop1.sum/12) * 100000

# aggregate by education and month
deduc <- summaryBy(suic + pop1 ~ educa3 + time, FUN=sum, data=de)
deduc$rate <- deduc$suic.sum / (deduc$pop1.sum/12) * 100000

# aggregate by state and month
dstate <- summaryBy(suic + pop1 ~ state + time, FUN=sum, data=d)
dstate$rate <- dstate$suic.sum / (dstate$pop1.sum/12) * 100000

# aggregate by Census division and month
# first merge with state geographic codes and regional identifiers
fips <- read.dta("fips.dta")
tg <- merge(d,fips,by.x="state", by.y="stabbc")

stabblist <- unique(tg$stabbc)
stnamelist <- unique(tg$stnamec)
stnum <- unique(tg$state)
rnamelist <- unique(tg$regnamec)
rnum <- unique(tg$reg)
dnamelist <- unique(tg$divnamec)
dnum <- unique(tg$div)

# identify state recession periods
drec <- summaryBy(time ~ state + strec, FUN = function(x) { c(min = min(x), max = max(x)) }, data=d)


## 3
## NBER recession dates
# source: http://www.nber.org/cycles.html
recessions.df = read.table(textConnection(
  "Peak, Trough
  1857-06-01, 1858-12-01
  1860-10-01, 1861-06-01
  1865-04-01, 1867-12-01
  1869-06-01, 1870-12-01
  1873-10-01, 1879-03-01
  1882-03-01, 1885-05-01
  1887-03-01, 1888-04-01
  1890-07-01, 1891-05-01
  1893-01-01, 1894-06-01
  1895-12-01, 1897-06-01
  1899-06-01, 1900-12-01
  1902-09-01, 1904-08-01
  1907-05-01, 1908-06-01
  1910-01-01, 1912-01-01
  1913-01-01, 1914-12-01
  1918-08-01, 1919-03-01
  1920-01-01, 1921-07-01
  1923-05-01, 1924-07-01
  1926-10-01, 1927-11-01
  1929-08-01, 1933-03-01
  1937-05-01, 1938-06-01
  1945-02-01, 1945-10-01
  1948-11-01, 1949-10-01
  1953-07-01, 1954-05-01
  1957-08-01, 1958-04-01
  1960-04-01, 1961-02-01
  1969-12-01, 1970-11-01
  1973-11-01, 1975-03-01
  1980-01-01, 1980-07-01
  1981-07-01, 1982-11-01
  1990-07-01, 1991-03-01
  2001-03-01, 2001-11-01
  2007-12-01, 2009-06-01"), sep=',',
                           colClasses=c('Date', 'Date'), header=TRUE)

# Note that the recession data start long before our suicide data, so let’s trim it to match:

recessions.trim = subset(recessions.df, Peak >= min(d$time))


# 4
# Create plots


# Figure 1
# plot of ICEI over time by state

is <- ggplot() + geom_rect(data=recessions.trim, aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill="grey80", alpha=0.5) + geom_line(data=diceis, size=0.5, aes(x=time, y=icei.mean, group=state), colour="#6699FF") + geom_line(data=dicei, size=2, aes(x=time,y=icei.mean)) + ylab("Index of Coincident Economic Indicators \n (July, 1992=100)") + xlab("") + theme_bw() + theme(axis.text.x = element_text(size = 12), axis.title.y=element_text(size=12, angle=90), axis.text.y = element_text(size = 12), panel.grid.major = element_line(colour = "white")) 


# Figure 2
# set colour palette
cp <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3")

# plot by gender
dsex$s <- factor(dsex$sex,levels=c("1male","2female"),labels=c("Men","Women"))
names(dsex)[names(dsex)=="s"]  <- "Gender"

g <- ggplot(dsex) + geom_rect(data=recessions.trim, aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill="grey80", alpha=0.5) + geom_line(alpha=0.5, aes(x=time, y=rate, colour=`Gender`)) + geom_smooth(size=1, aes(x=time, y=rate, colour=`Gender`, linetype=`Gender`, fill=`Gender`), method=loess, span = 0.25, alpha=0.2) + scale_y_continuous(limits=c(0,32)) + ylab("Annual rate per 100,000") + xlab("") + theme_bw() + scale_colour_manual(values=cp) + scale_fill_manual(values=cp) + theme(axis.text.x = element_text(size = 14), axis.title.y=element_text(size=16, angle=90), axis.text.y = element_text(size = 16), legend.title = element_text(size=16), legend.text=element_text(size=14), legend.key = element_rect(colour = NA), legend.key.width = unit(1, "cm"), panel.grid.major = element_line(colour = "white")) + guides(color=guide_legend(override.aes=list(fill=NA)))

# plot by age
names(dage)[names(dage)=="agecat"]  <- "Age"

a <- ggplot(dage) + geom_rect(data=recessions.trim, aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill="grey80", alpha=0.5) + geom_line(alpha=0.5, aes(x=time, y=rate, colour=`Age`)) + geom_smooth(size=1, aes(x=time, y=rate, colour=`Age`, linetype=`Age`, fill=`Age`), method=loess, span = 0.25, alpha=0.2) + scale_y_continuous(limits=c(0,32)) + ylab("Annual rate per 100,000") + xlab("") + theme_bw() + scale_colour_manual(values=cp) + scale_fill_manual(values=cp) + theme(axis.text.x = element_text(size = 14), axis.title.y=element_text(size=16, angle=90), axis.text.y = element_text(size = 16), legend.title = element_text(size=16), legend.text=element_text(size=14), legend.key = element_rect(colour = NA), legend.key.width = unit(1, "cm"), panel.grid.major = element_line(colour = "white")) + guides(color=guide_legend(override.aes=list(fill=NA)))

# plot by race
drace$r <- factor(drace$race2,levels=c("1White","2OtherRace"),labels=c("White","Non-White"))
names(drace)[names(drace)=="r"]  <- "Race"

r <- ggplot(drace) + geom_rect(data=recessions.trim, aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill="grey80", alpha=0.5) + geom_line(alpha=0.5, aes(x=time, y=rate, colour=`Race`)) + geom_smooth(size=1, aes(x=time, y=rate, colour=`Race`, linetype=`Race`, fill=`Race`), method=loess, span = 0.25, alpha=0.2)  + scale_y_continuous(limits=c(0,32)) + ylab("Annual rate per 100,000") + xlab("") + theme_bw() + scale_colour_manual(values=cp) + scale_fill_manual(values=cp) + theme(axis.text.x = element_text(size = 14), axis.title.y=element_text(size=16, angle=90), axis.text.y = element_text(size = 16), legend.title = element_text(size=16), legend.text=element_text(size=14), legend.key = element_rect(colour = NA), legend.key.width = unit(1, "cm"), panel.grid.major = element_line(colour = "white")) + guides(color=guide_legend(override.aes=list(fill=NA)))



# plot by education
deduc$e <- factor(deduc$educa3,levels=c("<HS","HS Grad","MinsomeCollege/Assoc"),labels=c("<12 yrs","12 yrs", ">12 yrs"))
names(deduc)[names(deduc)=="e"]  <- "Education"

e <- ggplot(deduc) + geom_rect(data=recessions.trim, aes(xmin=Peak, xmax=Trough, ymin=-Inf, ymax=+Inf), fill="grey80", alpha=0.5) + geom_line(alpha=0.5, aes(x=time, y=rate, colour=`Education`)) + geom_smooth(size=1, aes(x=time, y=rate, colour=`Education`, linetype=`Education`, fill=`Education`), method=loess, span = 0.25, alpha=0.2)  + scale_y_continuous(limits=c(0,32)) + ylab("Annual rate per 100,000") + scale_x_date(limits=c(as.Date("1980-01-01"),as.Date("2011-01-01"))) + xlab("") + theme_bw() + scale_colour_manual(values=cp) + scale_fill_manual(values=cp) + theme(axis.text.x = element_text(size = 14), axis.title.y=element_text(size=16, angle=90), axis.text.y = element_text(size = 16), legend.title = element_text(size=16), legend.text=element_text(size=14), legend.key = element_rect(colour = NA), legend.key.width = unit(1, "cm"), panel.grid.major = element_line(colour = "white")) + guides(color=guide_legend(override.aes=list(fill=NA)))




# Combine plots (need to export to PDF to save)
plot_all <- grid.arrange(g,a,r,e,ncol=2)

# Save individual plots
ggsave("suic-fig1s-icei.tiff", is, width=6, height=4, dpi=600)
ggsave("suic-fig2-gender-bw.pdf", g, width=6, height=4, dpi=600)
ggsave("suic-fig2-age-bw.pdf", a, width=6, height=4, dpi=600)
ggsave("suic-fig2-race-bw.pdf", r, width=6, height=4, dpi=600)
ggsave("suic-fig2-educ-bw.pdf", e, width=6, height=4, dpi=600)


